Part 1: PCA with Penguins

# going to scale, and breakdown by PCs

penguins_pca <- penguins %>% 
  select(body_mass_g, ends_with("_mm")) %>% 
  drop_na() %>% 
  scale() %>% 
  prcomp()

penguins_pca$rotation
##                          PC1         PC2        PC3        PC4
## body_mass_g        0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm     0.4552503 0.597031143  0.6443012 -0.1455231
## bill_depth_mm     -0.4003347 0.797766572 -0.4184272  0.1679860
## flipper_length_mm  0.5760133 0.002282201 -0.2320840  0.7837987
#Above, rotation shows the loadings (vector length) of each variable with each PC
#Below, autoplot automatically selects the type of plot that I want to make in ggplot. Since we've converted penguins_pca into a list for of PCs, it assumed we wnat a PCA biplot
# Making a df of penguin info that matches the PCA. we'll have the same observation set, but will ahve info like species.

penguin_complete <- penguins %>% 
  drop_na(body_mass_g, ends_with("_mm"))
# we use this MATCHING data to add aesthetics to the biplot
autoplot(penguins_pca, 
         data = penguin_complete,
         colour = 'species',
         loadings = TRUE,
         loadings.label = TRUE) +
  theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

# All the customization in ggplot is available in autoplot
# facto_extra is another great place to play with biplots/PCA 

Part 2: New skills for ggplot customization + Reading in different file types

Starting with .xlsx file

fish_noaa <- read_excel(here("data", "foss_landings.xlsx")) %>% 
  clean_names() %>% 
  mutate(across(where(is.character), tolower)) %>% 
  mutate(fish_name= str_sub(nmfs_name, end = -4)) %>% 
  filter(confidentiality == "public")
# Instead of adding columns, we're transforming columns w that function.for any character column, we send it to lower case. Across selects different columns, we chose all character columns (could be "ends_with("mm"). and within that we did 
# We then used string_sub within mutate (stringr) to remove the last 4 characters.

Make a customized graph:

fish_plot <- ggplot(data =  fish_noaa, 
                    aes( x = year, y = pounds)) + 
  geom_line(aes(color = fish_name), show.legend = FALSE) +
  theme_minimal()

fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot)
#this makes any map interactive

### use gghighlight to highlight certain series. 

fish_plot_2 <- ggplot(data =  fish_noaa, 
                    aes( x = year, y = pounds, group = fish_name)) +
  geom_line() +
  theme_minimal() +
  gghighlight(fish_name == "tunas") + # Highlight just tunas
  theme_minimal()
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## label_key: fish_name
fish_plot_2
## Warning: Removed 6 row(s) containing missing values (geom_path).